Normal Start Functions

Load isofiles

isofiles <-
  file.path(
    "Results",
    c(
      "180223_alkanes_C_GB",
      "180226_alkanes_C_GB"
    )
  ) %>%
  iso_read_continuous_flow()
## # A tibble: 12 x 4
##    file_id                  type  func           details                  
##    <chr>                    <chr> <chr>          <chr>                    
##  1 486__Linearity CO2__.dxf error extract_dxf_r… cannot identify measured…
##  2 486__Linearity CO2__.dxf error extract_isoda… `peak`, `start`, `rt`, `…
##  3 487__Linearity CO2__.dxf error extract_dxf_r… cannot identify measured…
##  4 487__Linearity CO2__.dxf error extract_isoda… `peak`, `start`, `rt`, `…
##  5 488__Linearity CO2__.dxf error extract_dxf_r… cannot identify measured…
##  6 488__Linearity CO2__.dxf error extract_isoda… `peak`, `start`, `rt`, `…
##  7 511__Linearity CO2__.dxf error extract_dxf_r… cannot identify measured…
##  8 511__Linearity CO2__.dxf error extract_isoda… `peak`, `start`, `rt`, `…
##  9 512__Linearity CO2__.dxf error extract_dxf_r… cannot identify measured…
## 10 512__Linearity CO2__.dxf error extract_isoda… `peak`, `start`, `rt`, `…
## 11 513__Linearity CO2__.dxf error extract_dxf_r… cannot identify measured…
## 12 513__Linearity CO2__.dxf error extract_isoda… `peak`, `start`, `rt`, `…

Data

File Info

# all file info
isofiles %>% iso_get_file_info()
## Info: aggregating file info from 52 data file(s)

Vendor data table

isofiles %>% iso_get_vendor_data_table()
## Info: aggregating vendor data table without units from 52 data file(s)

Chromatograms

isofiles %>% 
  # fetch a few of the files of interst
  iso_filter_files(parse_number(Analysis) %in% c(525, 528, 530)) %>% 
  # plot just mass 28
  iso_plot_continuous_flow_data(data = c(46)) %>% 
  # make interactive
  ggplotly(dynamicTicks = TRUE)
## Warning: You need the dev version of ggplot2 to use `dynamicTicks`

Analysis

Select relevant data

data_table <-
  isofiles %>%
  # filter the files you want to use 
  # --> exclude CO2 zeros
  iso_filter_files(!str_detect(`Identifier 1`, "CO2 zero")) %>% 
  # --> select only good analyses
  iso_filter_files(parse_number(Analysis) > 490) %>% 
  # get the vendor data table and file info
  iso_get_vendor_data_table(
    select = c(
      # peak info
      Nr., Start, Rt, End,
      # amplitudes and intensities
      `Ampl 44`, `Ampl 46`, `Intensity All`, `Intensity 44`,
      # ratios and deltas
      `R 46CO2/44CO2`, `d 13C/12C`
    ),
    include_file_info = c(
      file_datetime, Analysis, `Identifier 1`, `Identifier 2`, `GC Method`, `Seed Oxidation`
    )
  ) %>% 
  # rename some columns to be easier to work with
  rename(
    Ampl44=`Ampl 44`, Ampl46=`Ampl 46`, # amplitudes
    #Area28=`rIntensity 28`, Area29=`rIntensity 29`, # areas
    Intensity=`Intensity All`, #peak intensities
    R46C = `R 46CO2/44CO2`, d13C = `d 13C/12C`, # ratio and delta values
    rt = `Rt`,
    rt_start = `Start`,
    rt_end = `End`
    #File = `file_id`
  )
## Info: applying file filter, keeping 41 of 52 files
## Info: applying file filter, keeping 36 of 41 files
## Info: aggregating vendor data table without units from 36 data file(s), including file info 'c(file_datetime, Analysis, `Identifier 1`, `Identifier 2`, `GC Method`, 
##     `Seed Oxidation`)'
data_table

Map peaks

### CHANGE MAPPING FILE NAME ###
metadata_samples <- read_excel(file.path("metadata", "20180226_GB_metadata.xlsx"), sheet = "files")

metadata_peak_maps <- read_excel(file.path("metadata","20180226_GB_metadata.xlsx"), sheet = "maps")

metadata_samples
data_table_with_peaks <- data_table %>%
    iso_add_metadata(metadata_samples, match_by = c(`Identifier 1`, Analysis)) %>%
    iso_map_peaks(metadata_peak_maps) 
## Info: metadata added to 758 data rows, 0 left without metadata:
## - 1 metadata entries were mapped to 758 data entires via column 'Identifier 1'
## Info: 734 peaks in 33 files were successfully assigned, 24 could not be assigned, and 124 were missing
# missing and unidentified peaks
data_table_with_peaks %>% filter(!is_identified | is_missing)
# confirmed peaks
data_table_with_peaks <- data_table_with_peaks %>%
    filter(is_identified, !is_missing, !is_ambiguous)

data_table_with_peaks

Check stability of reference peaks

The second one is the one defined to be 0 permil (so will always be), the rest is relative to that peak.

p <- data_table_with_peaks %>% 
  filter(is_ref_peak == "yes") %>% 
  ggplot() + 
  aes(Nr., d13C, color = file_id) +
  geom_line() + 
  theme_bw()
ggplotly(p)

Add standard values

standards <- read_excel(file.path("metadata", "gc_irms_indiana_A6.xlsx"))
data_w_stds <- data_table_with_peaks %>% 
  filter(type == "standard", is_ref_peak == "no") %>% 
  left_join(standards, by = "compound") %>% 
  mutate(is_std = !is.na(true.d13C) | !is.na(true.d2H))

Process data

Focus on the analytes and calculate a few summary parameters we want to use later.

data_w_analyte_peaks <- 
  data_table_with_peaks %>% 
  # this is important so that the reference peaks are not caught up in the next set of calculations
  filter(is_ref_peak == "no") %>% 
  # for each analysis calculate averages across analysis
  group_by(Analysis) %>% 
  mutate(
    ampl_sample_mean.mV = mean(`Ampl44`), ampl_sample_sd.mV = sd(`Ampl44`),
    area_sample_mean.Vs = mean(`Intensity 44`), area_sample_sd.Vs = sd(`Intensity 44`)
  )

Standards

standards <- read_excel(file.path("metadata", "gc_irms_indiana_A6.xlsx"))
kable(standards)
compound true.d2H true.d13C
C16 -9.1 -26.15
C17 -117.8 -31.88
C18 -52.0 -32.70
C19 -56.3 -31.99
C20 -89.7 -33.97
C21 -177.8 -28.83
C22 -81.3 -33.77
C23 -67.2 -33.37
C24 -29.7 -32.13
C25 -263.0 -28.46
C26 -45.9 -32.94
C27 -172.8 -30.49
C28 -36.8 -33.20
C29 -177.8 -29.10
C30 -213.6 -29.84
data_w_stds <-
  data_w_analyte_peaks %>% 
  iso_add_standards(standards)  
## Info: added 15 standard entries to 371 out of 371 rows

Visualize standards

v <- data_w_stds %>% 
  ggplot() +
  aes(x = true.d13C, y = d13C, color = file_id) + 
  geom_smooth(method = "lm", se = FALSE, alpha = 0.5) +
  geom_point() +
  theme_bw() +
  theme(legend.position = "none") 
ggplotly(v)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`

Calibration

data_w_calibs <- data_w_stds %>% 
  # prepare for calibration by defining the grouping column(s) and setting default parameters
  iso_prepare_for_calibration(group_by = c(Analysis)) %>% 
  iso_set_default_process_parameters(delta_residual = resid_d13C) %>% 
  # run calibration
  iso_calibrate_delta(model = lm(`d13C` ~ true.d13C)) %>% 
  # pull out some columns we want generally available
  iso_unnest_calib_data(select = c(starts_with("Id"), ampl_sample_mean.mV)) 
## Info: preparing data for calibration by grouping based on 'Analysis' and nesting the grouped datasets into 'calibration_data'
## Info: calculating delta calibration fits based on 1 model ('lm(d13C ~ true.d13C)') for 29 data group(s) in 'calibration_data' with filter 'is_standard'; storing residuals in 'resid_d13C.'
data_w_calibs %>% 
  iso_unnest_delta_calib_summary(keep_other_list_data = FALSE) %>% 
  kable(digits =3)
Analysis Identifier 1 Identifier 2 ampl_sample_mean.mV r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC deviance df.residual
491 A6 1.0uL 860.510 0.932 0.926 0.643 177.132 0.000 2 -13.598 33.196 35.320 5.383 13
492 A6 1.5uL 1473.378 0.974 0.972 0.382 485.076 0.000 2 -5.771 17.543 19.667 1.896 13
493 A6 2.0uL 1809.000 0.983 0.982 0.297 764.643 0.000 2 -2.005 10.010 12.134 1.147 13
494 A6 3uL 2594.962 0.989 0.988 0.246 1187.994 0.000 2 0.833 4.334 6.458 0.786 13
496 A6 0.5uL 354.690 0.852 0.840 1.021 69.299 0.000 2 -19.082 44.164 46.081 12.518 12
497 A6 1.0uL 992.991 0.957 0.953 0.508 286.019 0.000 2 -10.051 26.102 28.226 3.355 13
498 A6 1.5uL 1551.272 0.979 0.977 0.335 606.561 0.000 2 -3.812 13.624 15.748 1.460 13
499 A6 2.0uL 2001.386 0.986 0.985 0.271 925.837 0.000 2 -0.629 7.259 9.383 0.955 13
500 A6 3uL 2469.883 1.000 1.000 0.049 31271.358 0.000 2 24.997 -43.993 -41.869 0.031 13
502 A6 0.5uL 410.170 0.925 0.917 0.611 111.506 0.000 2 -9.083 24.166 25.359 3.358 9
503 A6 1.0uL 1057.313 0.974 0.972 0.378 480.999 0.000 2 -5.617 17.235 19.359 1.857 13
504 A6 1.5uL 1869.148 0.989 0.988 0.243 1155.148 0.000 2 1.032 3.937 6.061 0.765 13
505 A6 2.0uL 2321.932 0.994 0.994 0.179 2169.583 0.000 2 5.578 -5.156 -3.032 0.417 13
506 A6 3uL 4059.189 1.000 1.000 0.031 76319.605 0.000 2 31.736 -57.472 -55.348 0.013 13
514 A6 0.5uL 135.952 0.846 0.823 1.125 38.309 0.000 2 -12.700 31.399 31.991 8.859 7
515 A6 0.5uL 251.494 0.849 0.824 1.192 33.707 0.001 2 -11.603 29.206 29.444 8.519 6
517 A6 0.5uL 397.946 0.870 0.854 0.859 53.467 0.000 2 -11.551 29.102 30.010 5.900 8
518 A6 1.0uL 1028.965 0.853 0.839 0.956 63.715 0.000 2 -16.780 39.560 41.255 10.060 11
519 A6 2.0uL 2044.818 0.932 0.926 0.591 151.570 0.000 2 -10.522 27.044 28.739 3.841 11
520 A6 3uL 3075.854 0.978 0.976 0.323 485.383 0.000 2 -2.650 11.300 12.995 1.144 11
522 A6 0.5uL 446.709 0.914 0.902 0.716 74.560 0.000 2 -8.637 23.275 23.866 3.592 7
523 A6 1.0uL 1054.275 0.894 0.884 0.788 92.529 0.000 2 -14.265 34.530 36.225 6.832 11
524 A6 2.0uL 2062.263 0.929 0.922 0.613 143.730 0.000 2 -11.005 28.010 29.705 4.138 11
525 A6 3uL 3596.214 0.992 0.991 0.192 1324.835 0.000 2 4.092 -2.184 -0.489 0.406 11
526 A6 0.2uL 57.897 0.905 0.857 0.616 18.999 0.049 2 -2.353 10.707 8.866 0.760 2
527 A6 0.5uL 476.656 0.904 0.890 0.796 65.928 0.000 2 -9.589 25.179 25.770 4.438 7
528 A6 1.0uL 1144.098 0.920 0.913 0.657 127.265 0.000 2 -11.905 29.810 31.505 4.752 11
529 A6 2.0uL 2158.592 0.947 0.943 0.513 198.511 0.000 2 -8.686 23.373 25.068 2.896 11
530 A6 3uL 3608.378 0.995 0.995 0.142 2409.987 0.000 2 8.019 -10.038 -8.343 0.222 11
data_w_calibs %>% 
  # pull out seed oxidation in addition
  iso_unnest_calib_data(select = `Seed Oxidation`) %>% 
  iso_unnest_delta_calib_coefs(select = c(-statistic), keep_other_list_data = FALSE) %>% 
  # arrange by term and Analysis to get a quick idea of what the numbers across analyses
  arrange(term, Analysis) %>% 
  kable(digits = 2)
Analysis Identifier 1 Identifier 2 ampl_sample_mean.mV Seed Oxidation term estimate std.error p.value signif
491 A6 1.0uL 860.51 1 (Intercept) 8.72 2.32 0.00 **
492 A6 1.5uL 1473.38 1 (Intercept) 8.79 1.38 0.00 ***
493 A6 2.0uL 1809.00 1 (Intercept) 8.25 1.07 0.00 ***
494 A6 3uL 2594.96 1 (Intercept) 9.34 0.89 0.00 ***
496 A6 0.5uL 354.69 1 (Intercept) 8.86 3.79 0.04 *
497 A6 1.0uL 992.99 1 (Intercept) 9.37 1.83 0.00 ***
498 A6 1.5uL 1551.27 1 (Intercept) 8.50 1.21 0.00 ***
499 A6 2.0uL 2001.39 1 (Intercept) 8.61 0.98 0.00 ***
500 A6 3uL 2469.88 1 (Intercept) 10.28 0.18 0.00 ***
502 A6 0.5uL 410.17 1 (Intercept) 13.80 3.41 0.00 **
503 A6 1.0uL 1057.31 1 (Intercept) 8.56 1.36 0.00 ***
504 A6 1.5uL 1869.15 1 (Intercept) 8.65 0.87 0.00 ***
505 A6 2.0uL 2321.93 1 (Intercept) 9.11 0.65 0.00 ***
506 A6 3uL 4059.19 1 (Intercept) 10.30 0.11 0.00 ***
514 A6 0.5uL 135.95 1 (Intercept) 14.48 6.42 0.06 .
515 A6 0.5uL 251.49 1 (Intercept) 17.06 6.84 0.05 *
517 A6 0.5uL 397.95 1 (Intercept) 11.08 4.46 0.04 *
518 A6 1.0uL 1028.96 1 (Intercept) 13.21 4.39 0.01 *
519 A6 2.0uL 2044.82 1 (Intercept) 11.91 2.71 0.00 **
520 A6 3uL 3075.85 1 (Intercept) 11.22 1.48 0.00 ***
522 A6 0.5uL 446.71 1 (Intercept) 13.68 4.09 0.01 *
523 A6 1.0uL 1054.27 1 (Intercept) 13.10 3.62 0.00 **
524 A6 2.0uL 2062.26 1 (Intercept) 12.23 2.82 0.00 **
525 A6 3uL 3596.21 1 (Intercept) 10.72 0.88 0.00 ***
526 A6 0.2uL 57.90 1 (Intercept) 5.98 6.11 0.43
527 A6 0.5uL 476.66 1 (Intercept) 15.45 4.54 0.01 *
528 A6 1.0uL 1144.10 1 (Intercept) 12.47 3.02 0.00 **
529 A6 2.0uL 2158.59 1 (Intercept) 11.81 2.36 0.00 ***
530 A6 3uL 3608.38 1 (Intercept) 10.74 0.65 0.00 ***
491 A6 1.0uL 860.51 1 true.d13C 0.99 0.07 0.00 ***
492 A6 1.5uL 1473.38 1 true.d13C 0.97 0.04 0.00 ***
493 A6 2.0uL 1809.00 1 true.d13C 0.95 0.03 0.00 ***
494 A6 3uL 2594.96 1 true.d13C 0.97 0.03 0.00 ***
496 A6 0.5uL 354.69 1 true.d13C 1.01 0.12 0.00 ***
497 A6 1.0uL 992.99 1 true.d13C 0.99 0.06 0.00 ***
498 A6 1.5uL 1551.27 1 true.d13C 0.95 0.04 0.00 ***
499 A6 2.0uL 2001.39 1 true.d13C 0.95 0.03 0.00 ***
500 A6 3uL 2469.88 1 true.d13C 1.00 0.01 0.00 ***
502 A6 0.5uL 410.17 1 true.d13C 1.14 0.11 0.00 ***
503 A6 1.0uL 1057.31 1 true.d13C 0.95 0.04 0.00 ***
504 A6 1.5uL 1869.15 1 true.d13C 0.95 0.03 0.00 ***
505 A6 2.0uL 2321.93 1 true.d13C 0.96 0.02 0.00 ***
506 A6 3uL 4059.19 1 true.d13C 1.00 0.00 0.00 ***
514 A6 0.5uL 135.95 1 true.d13C 1.27 0.20 0.00 ***
515 A6 0.5uL 251.49 1 true.d13C 1.27 0.22 0.00 **
517 A6 0.5uL 397.95 1 true.d13C 1.05 0.14 0.00 ***
518 A6 1.0uL 1028.96 1 true.d13C 1.11 0.14 0.00 ***
519 A6 2.0uL 2044.82 1 true.d13C 1.05 0.09 0.00 ***
520 A6 3uL 3075.85 1 true.d13C 1.03 0.05 0.00 ***
522 A6 0.5uL 446.71 1 true.d13C 1.12 0.13 0.00 ***
523 A6 1.0uL 1054.27 1 true.d13C 1.10 0.11 0.00 ***
524 A6 2.0uL 2062.26 1 true.d13C 1.07 0.09 0.00 ***
525 A6 3uL 3596.21 1 true.d13C 1.01 0.03 0.00 ***
526 A6 0.2uL 57.90 1 true.d13C 0.87 0.20 0.05 *
527 A6 0.5uL 476.66 1 true.d13C 1.18 0.14 0.00 ***
528 A6 1.0uL 1144.10 1 true.d13C 1.08 0.10 0.00 ***
529 A6 2.0uL 2158.59 1 true.d13C 1.05 0.07 0.00 ***
530 A6 3uL 3608.38 1 true.d13C 1.01 0.02 0.00 ***

Parameters

data_params <- data_w_calibs %>% 
  # pull out remaining columns we want available (some might already be pulled out but that's okay)
  #Note: Garrett changed "Preparation" to "Seed Oxidation" - we may want a different shape variable
  iso_unnest_calib_data(select = c(file_datetime, ampl_sample_mean.mV, `Seed Oxidation`, is_standard)) 
  
# visualize the delta calibration fits
data_params %>% 
  #NEED TO FILTER OUT SAMPLES iso_filter_files(`Identifier 1` == "A5") 
  iso_visualize_delta_calib_fits(x = Analysis, color = `Identifier 2`, shape = `Seed Oxidation`, size = ampl_sample_mean.mV,
                                 include_from_summary = c(adj.r.squared, deviance)) + labs(title = "parameters vs. analysis")

data_params %>% 
  iso_visualize_delta_calib_fits(x = ampl_sample_mean.mV, color = `Identifier 2`, shape = `Seed Oxidation`, size = ampl_sample_mean.mV,
                                 include_from_summary = c(adj.r.squared, deviance)) + labs(title = "parameters vs. amplitude") 

data_params %>% 
  iso_visualize_delta_calib_fits(x = file_datetime, color = `Identifier 2`, shape = `Seed Oxidation`, size = ampl_sample_mean.mV,
                                 include_from_summary = c(adj.r.squared, deviance)) + labs(title = "parameters vs. time")

# turn the last plot into an interactive one
ggplotly(ggplot2::last_plot() + theme(legend.position = "none"))
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`

Compounds

data_w_calibs %>% 
  # pull out relevant data
  iso_unnest_calib_data(select = everything()) %>% 
  filter(is_standard) %>% 
  # calculate deviation from means
  group_by(Analysis) %>% 
  mutate(
    `Var: residual d13C [permil]` = resid_d13C,
    `Var: area diff from mean [%]` = (`Intensity 44`/mean(`Intensity 44`) - 1) * 100,
    `Var: amplitude diff from mean [%]` = (`Ampl44`/mean(`Ampl44`) - 1) * 100
  ) %>%
  # visualize
  iso_visualize_data(x = compound, y = starts_with("Var"), group = Analysis, color = `Identifier 2`)

ggplotly(ggplot2::last_plot())
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`